#1 导入技术依赖
from lxml import etree

#2 解析test.html文件，返回一个节点树的对象
html_selector = etree.parse("test.html",etree.HTMLParser())

#获取title
root = html_selector.xpath("/html/head/title")

#获取根节点html的元素
root = html_selector.xpath("/html")
# print(root)
title = html_selector.xpath("/html/head/title/text()")
# print(title)
a = html_selector.xpath("//a/text()")
# print(a) #['纸质书', '80', '电子书', '45']
ahref = html_selector.xpath("//a[@id]/text()")
# print(ahref) #['电子书']

alocal = html_selector.xpath('//a[@href="http://localhost"]/text()')
# print(alocal)#['80']

price = html_selector.xpath('//li/p[1]/text()')
# print(price)
#['c语言小白变怪兽', 'Python入门到精通']
priceInt = html_selector.xpath('//li/p[a<50]/a/text()')
print(priceInt)#['45']